import requests
from bs4 import BeautifulSoup
import re
import pinyin


def collect_chinese_characters():
    # 收集汉字
    f = open(r'D:\learn\hz.txt', 'w')
    url1 = 'http://www.fuhaoku.com/zi/'
    res = requests.get(url1)
    data = res.content.decode('utf-8')
    after_processing_string = BeautifulSoup(data,'html.parser')
    the_res = after_processing_string.find_all('p')
    re_rule = re.compile(r'(.+)</')
    for i in the_res:
        i = ''.join([x for x in str(i) if x != " " and x != '\r'])
        temp = re_rule.findall(i)
        f.write(temp[0])
    print('over')
    f.close()


def few_words():
    f = open('D:\learn\cy.txt', 'r')
    f1 = open('D:\learn\cy1.txt', 'w')
    res = f.read().split()
    for i in res:
        if len(i) > 2:
            i = i.replace('，',' ')
            i = i.replace(';',' ')
            i = i.replace('、',' ')
            temp = i.split()
            for j in temp:
                f1.write(j+' ')
        else:
            f1.write(i+' ')

    f1.close()
    f.close()


def get_ym(res):
    for i in range(len(res)):
        for j in ym:
            if res[i:len(res)] == j:
                return j
    return 0

# 设置韵母和整体认读音节
ym = ['a','o','e',	'i','u','v','ai','ei','ui','ao','ou',
      'iu','ie','ve','er','an','en','in','un','vn','ang',
      'eng','ing','ong','iao','ian','iang','iong','uai','uan','uang',
      'zhi','chi','shi','ri','zi','ci','si','yi','wu','yu','ye','yue','yuan','yin','yun','ying']
input_data = input('输入字符')

res = pinyin.get(input_data, format='strip')
print(res)
res_ym = get_ym(res)

res_hz =[]
res_cy = []
res_cy1 = []
with open('D:\learn\hz.txt', 'r') as f:
    data = f.read()
with open('D:\learn\ciyu1.txt', 'r') as f1:
    data1 = f1.read().split()
with open('D:\learn\chengyu.txt', 'r') as f2:
    data2 = f2.read().split()
for i in data:
    if res_ym == get_ym(pinyin.get(i, format='strip')):
        res_hz.append(i)
for j in data1:
    if res_ym == get_ym(pinyin.get(j[-1], format='strip')):
        res_cy.append(j)
for k in data2:
    if res_ym == get_ym(pinyin.get(k[-1], format='strip')):
        res_cy1.append(k)

print(res_hz)
print(res_cy)
print(res_cy1)